In [1]:
    
import numpy as np
from scipy.linalg import *
from drl.env.arm import TwoLinkArm
import time
%matplotlib notebook
    
In [2]:
    
epsilon = 1e-5
Ts = 10
env = TwoLinkArm(g=0.)
Q = np.eye(env.state_dim)*100.
R = np.eye(env.action_dim)*1.
    
In [3]:
    
def calc_derivatives(x, u):
    A = np.zeros((env.state_dim, env.state_dim))
    for i in range(env.state_dim):
        x_tmp = x.copy()
        x_tmp[i] += epsilon
        _, f_1 = env.dynamics_func(x_tmp, u)
        x_tmp = x.copy()
        x_tmp[i] -= epsilon
        _, f_2 = env.dynamics_func(x_tmp, u)
        fxdx = (f_1 - f_2) / (2*epsilon)
        A[:, i] = fxdx
        
    B = np.zeros((env.state_dim, env.action_dim))
    for i in range(env.action_dim):
        u_tmp = u.copy()
        u_tmp[i] += epsilon
        _, f_1 = env.dynamics_func(x, u_tmp)
        u_tmp = u.copy()
        u_tmp[i] -= epsilon
        _, f_2 = env.dynamics_func(x, u_tmp)
        fxdu = (f_1 - f_2) / (2*epsilon)
        B[:, i] = fxdu
        
    return A, B
    
In [4]:
    
def run_experiment():
    x = env.reset()
    env.render()
    u = [0.]*env.action_dim
    
    V = 0.
    for _ in range(int(Ts/env.dt)):
        # Calculate optimal feedback gain K
        error = x - env.get_goal()
        A, B = calc_derivatives(error, u)
        P = solve_continuous_are(A, B, Q, R)
        K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))
        
        u = -np.dot(K, error)
        x, _, _, _ = env.step(u)
        
        env.render()
        
        V += np.dot(error.T, np.dot(P, error))
    
    print('Episode %s - Total Cost: %s' % (str(i), V))
    return env.goal - x
    
In [5]:
    
for i in range(5):
    error = run_experiment()
    
    
    
In [ ]:
    
env.render(close=True)
    
In [ ]:
    
num_targets = 5
def run_experiment2():
    x = env.reset()
    env.render()
    u = [0.]*env.action_dim
    
    V_tot = 0.
    for i in range(num_targets):
        env.set_goal()
        goal_reached = False
        
        while not goal_reached:
            error = x - env.get_goal()
            A, B = calc_derivatives(error, u)
            P = solve_continuous_are(A, B, Q, R)
            K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))
            u = -np.dot(K, error)
            x, _, _, _ = env.step(u)
            env.render()
            
            V = np.dot(error.T, np.dot(P, error))
            V_tot += V
            if V < 1e-4:
                goal_reached = True
                
        print('Target %s - Total Cost: %s' % (str(i), V_tot))
    
In [ ]:
    
run_experiment2()
    
In [ ]:
    
env.render(close=True)